{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b7e004ec-3b21-469d-af53-04883a84b5eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "\n",
    "import statsmodels.api as sm\n",
    "from patsy import dmatrices   \n",
    "import matplotlib.pyplot as plt\n",
    "import string\n",
    "import statsmodels.formula.api as smf\n",
    "from xlsxwriter import Workbook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39e1cba3-0821-4720-b5c0-d444baf10b72",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "DATA_PATH = Path(r\"E:\\\\df.xlsx\")\n",
    "\n",
    "COL_YEAR     = \"year\"\n",
    "COL_SIDO     = \"sido\"\n",
    "COL_HOME     = \"homeownership\"      \n",
    "COL_OCC      = \"occupation\"         \n",
    "COL_INCOME1  = \"income_1\"           \n",
    "COL_EDU      = \"education\"\n",
    "COL_SEX      = \"sex\"\n",
    "COL_AGE      = \"age\"\n",
    "COL_IDEO     = \"ideology\"\n",
    "COL_MARRIAGE = \"marriage\"           \n",
    "COL_RELIGION = \"religion\"          \n",
    "\n",
    "COL_TOTALMIN = \"total_minutes\"\n",
    "COL_HOUR     = \"hour\"\n",
    "COL_MIN      = \"minute\"\n",
    "\n",
    "OUTCOMES = [\n",
    "    \"happy\", \"relaxed\", \"energetic\",\n",
    "    \"worried\", \"sad\", \"depressed\",\n",
    "    \"angry\", \"stressed\", \"tired\", \"lonely\"\n",
    "]\n",
    "\n",
    "TARGET_TERMS = [\"ln_commute\", COL_HOME, \"ln_commute:homeownership\"]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c8651cef-97e2-4fa1-a23e-0bc93d903dba",
   "metadata": {},
   "source": [
    "## Analysis 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2d5500a-e2f6-41b2-9e8f-cc00c09d503a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1) data load\n",
    "# ------------------------------------------------------------\n",
    "df = pd.read_excel(DATA_PATH)\n",
    "\n",
    "# ------------------------------------------------------------\n",
    "# 2) total minutes: if total_minutesm use it, else hour+minute\n",
    "# ------------------------------------------------------------\n",
    "def build_minutes(_df: pd.DataFrame) -> pd.Series:\n",
    "    if COL_TOTALMIN in _df.columns and _df[COL_TOTALMIN].notna().any():\n",
    "        return pd.to_numeric(_df[COL_TOTALMIN], errors=\"coerce\")\n",
    "    elif (COL_HOUR in _df.columns) and (COL_MIN in _df.columns):\n",
    "        h = pd.to_numeric(_df[COL_HOUR], errors=\"coerce\")\n",
    "        m = pd.to_numeric(_df[COL_MIN],  errors=\"coerce\")\n",
    "        minutes = np.where(h.notna() & m.notna(), h*60 + m, np.nan)\n",
    "        return pd.Series(minutes, index=_df.index)\n",
    "    else:\n",
    "        raise ValueError(\"통근시간 원자료(total_minutes 또는 hour/minute)가 필요합니다.\")\n",
    "\n",
    "df[\"commute_min_raw\"] = build_minutes(df)\n",
    "\n",
    "# extreme value processing \n",
    "# df[\"commute_min\"] = df[\"commute_min_raw\"].clip(lower=0, upper=180)\n",
    "q99 = df[\"commute_min_raw\"].quantile(0.99)\n",
    "df[\"commute_min\"] = df[\"commute_min_raw\"].clip(lower=0, upper=q99)\n",
    "\n",
    "# log(0/negative -> NaN)\n",
    "commute = df[\"commute_min\"].copy()\n",
    "commute[(~np.isfinite(commute)) | (commute <= 0)] = np.nan\n",
    "df[\"ln_commute\"] = np.log(commute) \n",
    "\n",
    "# ------------------------------------------------------------\n",
    "# 3) Controls/Fixed Effects\n",
    "# ------------------------------------------------------------\n",
    "\n",
    "df[\"occupation_pm\"] = np.where(df[COL_OCC].isna(), np.nan, (df[COL_OCC] == 1).astype(float))\n",
    "\n",
    "df[COL_MARRIAGE] = pd.to_numeric(df[COL_MARRIAGE], errors=\"coerce\")\n",
    "\n",
    "\n",
    "year_d = pd.get_dummies(df[COL_YEAR].astype(\"category\"), prefix=\"year\", drop_first=True)\n",
    "sido_d = pd.get_dummies(df[COL_SIDO].astype(\"category\"), prefix=\"sido\", drop_first=True)\n",
    "rel_d  = pd.get_dummies(df[COL_RELIGION].astype(\"category\"), prefix=\"religion\", drop_first=True)\n",
    "\n",
    "\n",
    "CONTROLS = [COL_INCOME1, COL_EDU, \"occupation_pm\", COL_SEX, COL_AGE, COL_IDEO, COL_MARRIAGE]\n",
    "\n",
    "def fit_ols(yname: str):\n",
    "    # 설계행렬: 메인 변수 = ln_commute (단일)\n",
    "    X = pd.DataFrame({\n",
    "        \"ln_commute\": df[\"ln_commute\"]\n",
    "    })\n",
    "\n",
    "    # 통제/고정효과/종교 더미 추가 및 숫자형 강제\n",
    "    X = pd.concat([X,\n",
    "                   df[CONTROLS].apply(pd.to_numeric, errors=\"coerce\"),\n",
    "                   year_d, sido_d, rel_d], axis=1).astype(float)\n",
    "\n",
    "    # 상수항\n",
    "    X = sm.add_constant(X, has_constant=\"add\")\n",
    "\n",
    "    # 종속변수\n",
    "    y = pd.to_numeric(df[yname], errors=\"coerce\")\n",
    "\n",
    "    # 결측 제거\n",
    "    data = pd.concat([y.rename(yname), X], axis=1).dropna()\n",
    "    if data.shape[0] < 10:\n",
    "        raise ValueError(f\"{yname}: 표본이 부족합니다 (n={data.shape[0]}).\")\n",
    "\n",
    "    y_clean = data[yname].astype(float)\n",
    "    X_clean = data.drop(columns=[yname]).astype(float)\n",
    "\n",
    "    # OLS + HC1 \n",
    "    model = sm.OLS(y_clean, X_clean)\n",
    "    res = model.fit(cov_type=\"HC1\")\n",
    "    return res\n",
    "\n",
    "\n",
    "results = {}\n",
    "for y in OUTCOMES:\n",
    "    try:\n",
    "        results[y] = fit_ols(y)\n",
    "    except Exception as e:\n",
    "        print(f\"[WARN] {y} 적합 중 오류: {e}\")\n",
    "\n",
    "rows = []\n",
    "# 90% CI (z=1.645)\n",
    "z = 1.645\n",
    "\n",
    "# 관심 항목: ln_commute만\n",
    "TARGET_TERMS = [\"ln_commute\"]\n",
    "\n",
    "for y, res in results.items():\n",
    "    for term in TARGET_TERMS:\n",
    "        if term in res.params.index:\n",
    "            beta = float(res.params[term])\n",
    "            se   = float(res.bse[term])\n",
    "            lo, hi = beta - z*se, beta + z*se\n",
    "            rows.append({\"outcome\": y, \"term\": term, \"coef\": beta, \"ci_lo\": lo, \"ci_hi\": hi})\n",
    "\n",
    "coef_df = pd.DataFrame(rows, columns=[\"outcome\",\"term\",\"coef\",\"ci_lo\",\"ci_hi\"])\n",
    "\n",
    "if coef_df.empty:\n",
    "    print(\"[INFO] 관심계수/적합 결과가 없어 Figure 2를 건너뜁니다.\")\n",
    "else:\n",
    "    nrows, ncols = 2, 5\n",
    "    letters = list(string.ascii_lowercase)\n",
    "\n",
    "    fig, axes = plt.subplots(nrows, ncols, figsize=(22, 8), sharex=False)\n",
    "    axes = axes.flatten()\n",
    "\n",
    "    xmin = float(coef_df[\"ci_lo\"].min())\n",
    "    xmax = float(coef_df[\"ci_hi\"].max())\n",
    "    span = xmax - xmin\n",
    "    pad  = 0.1*span if np.isfinite(span) and span > 0 else 1.0\n",
    "    xmin, xmax = xmin - pad, xmax + pad\n",
    "\n",
    "    term_label = {\n",
    "        \"ln_commute\": \"ln(commute)\"\n",
    "    }\n",
    "    colors = {\n",
    "        \"ln_commute\": \"#1f77b4\"\n",
    "    }\n",
    "\n",
    "    for i, y in enumerate(OUTCOMES):\n",
    "        ax = axes[i]\n",
    "        sub = coef_df[(coef_df[\"outcome\"] == y) & (coef_df[\"term\"].isin(TARGET_TERMS))].copy()\n",
    "\n",
    "        if sub.empty:\n",
    "            ax.axis(\"off\")\n",
    "            ax.set_title(f\"({letters[i]}) {y} (no estimates)\", fontsize=14, loc=\"left\")\n",
    "            continue\n",
    "정\n",
    "        sub[\"order\"] = sub[\"term\"].map({t:j for j, t in enumerate(TARGET_TERMS)})\n",
    "        sub = sub.sort_values(\"order\")\n",
    "\n",
    "        ypos = np.arange(len(sub))[::-1]\n",
    "        for j, row in enumerate(sub.itertuples()):\n",
    "            ax.errorbar(row.coef, ypos[j],\n",
    "                        xerr=[[row.coef - row.ci_lo],[row.ci_hi - row.coef]],\n",
    "                        fmt='o', capsize=4, lw=1.8, ms=7, color=colors.get(row.term, \"black\"))\n",
    "\n",
    "            ax.text(row.coef, ypos[j] + 0.005, f\"{row.coef:.3f}\",\n",
    "                    ha=\"center\", va=\"bottom\", fontsize=14, fontweight=\"bold\", color=colors.get(row.term, \"black\"))\n",
    "\n",
    "        ax.axvline(0, ls=\"--\", lw=1, color=\"gray\")\n",
    "        ax.set_xlim(xmin, xmax)\n",
    "        ax.set_title(f\"({letters[i]}) {y}\", fontsize=24, loc=\"center\", pad=10)\n",
    "        ax.set_xlabel(\"\", fontsize=12)\n",
    "        ax.tick_params(axis='x', labelsize=12)\n",
    "        ax.grid(axis=\"x\", alpha=0.2)\n",
    "\n",
    "        col_idx = i % ncols\n",
    "        if col_idx == 0:\n",
    "            ax.set_yticks(ypos)\n",
    "            ax.set_yticklabels([term_label.get(t, t) for t in sub[\"term\"]], fontsize=20)\n",
    "        else:\n",
    "            ax.set_yticks(ypos)\n",
    "            ax.set_yticklabels([])\n",
    "\n",
    "\n",
    "    for k in range(len(OUTCOMES), nrows*ncols):\n",
    "        axes[k].axis(\"off\")\n",
    "\n",
    "    plt.tight_layout(pad=2.0, w_pad=2.5, h_pad=2.5)\n",
    "    plt.savefig(\"figure_5.png\", dpi=600, bbox_inches=\"tight\")\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0696ba38-5409-402e-9069-d1ee18dd5f01",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "9bf46c47-2515-4a62-b67c-2351370d63a6",
   "metadata": {},
   "source": [
    "## Analysis 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e1d8bdf-827f-4cfc-8fd6-8c39940d1a73",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "DATA_PATH = Path(r\"E:\\df.xlsx\")\n",
    "\n",
    "COL_YEAR     = \"year\"\n",
    "COL_SIDO     = \"sido\"\n",
    "COL_HOME     = \"homeownership\"     \n",
    "COL_OCC      = \"occupation\"         \n",
    "COL_INCOME1  = \"income_1\"           \n",
    "COL_EDU      = \"education\"\n",
    "COL_SEX      = \"sex\"\n",
    "COL_AGE      = \"age\"\n",
    "COL_IDEO     = \"ideology\"\n",
    "COL_MARRIAGE = \"marriage\"           \n",
    "COL_RELIGION = \"religion\"           \n",
    "\n",
    "COL_TOTALMIN = \"total_minutes\"\n",
    "COL_HOUR     = \"hour\"\n",
    "COL_MIN      = \"minute\"\n",
    "\n",
    "OUTCOMES = [\n",
    "    \"happy\", \"relaxed\", \"energetic\",\n",
    "    \"worried\", \"sad\", \"depressed\",\n",
    "    \"angry\", \"stressed\", \"tired\", \"lonely\"\n",
    "]\n",
    "\n",
    "TARGET_TERMS = [\"ln_commute\", COL_HOME, \"ln_commute:homeownership\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b81ec9b7-6b1e-49ae-9288-cdee23c5e228",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "df = pd.read_excel(DATA_PATH)\n",
    "\n",
    "def build_minutes(_df: pd.DataFrame) -> pd.Series:\n",
    "    if COL_TOTALMIN in _df.columns and _df[COL_TOTALMIN].notna().any():\n",
    "        return pd.to_numeric(_df[COL_TOTALMIN], errors=\"coerce\")\n",
    "    elif (COL_HOUR in _df.columns) and (COL_MIN in _df.columns):\n",
    "        h = pd.to_numeric(_df[COL_HOUR], errors=\"coerce\")\n",
    "        m = pd.to_numeric(_df[COL_MIN],  errors=\"coerce\")\n",
    "        minutes = np.where(h.notna() & m.notna(), h*60 + m, np.nan)\n",
    "        return pd.Series(minutes, index=_df.index)\n",
    "    else:\n",
    "        raise ValueError(\"통근시간 원자료(total_minutes 또는 hour/minute)가 필요합니다.\")\n",
    "\n",
    "df[\"commute_min_raw\"] = build_minutes(df)\n",
    "\n",
    "# df[\"commute_min\"] = df[\"commute_min_raw\"].clip(lower=0, upper=180)\n",
    "q99 = df[\"commute_min_raw\"].quantile(0.99)\n",
    "df[\"commute_min\"] = df[\"commute_min_raw\"].clip(lower=0, upper=q99)\n",
    "\n",
    "commute = df[\"commute_min\"].copy()\n",
    "commute[(~np.isfinite(commute)) | (commute <= 0)] = np.nan\n",
    "df[\"ln_commute\"] = np.log(commute)  # divide-by-zero 경고 없음\n",
    "\n",
    "df[\"occupation_pm\"] = np.where(df[COL_OCC].isna(), np.nan, (df[COL_OCC] == 1).astype(float))\n",
    "\n",
    "df[COL_MARRIAGE] = pd.to_numeric(df[COL_MARRIAGE], errors=\"coerce\")\n",
    "\n",
    "year_d = pd.get_dummies(df[COL_YEAR].astype(\"category\"), prefix=\"year\", drop_first=True)\n",
    "sido_d = pd.get_dummies(df[COL_SIDO].astype(\"category\"), prefix=\"sido\", drop_first=True)\n",
    "rel_d  = pd.get_dummies(df[COL_RELIGION].astype(\"category\"), prefix=\"religion\", drop_first=True)\n",
    "\n",
    "CONTROLS = [COL_INCOME1, COL_EDU, \"occupation_pm\", COL_SEX, COL_AGE, COL_IDEO, COL_MARRIAGE]\n",
    "\n",
    "def fit_ols(yname: str):\n",
    "    # 설계행렬: 메인 3변수\n",
    "    X = pd.DataFrame({\n",
    "        \"ln_commute\": df[\"ln_commute\"],\n",
    "        COL_HOME: pd.to_numeric(df[COL_HOME], errors=\"coerce\")\n",
    "    })\n",
    "    X[\"ln_commute:homeownership\"] = X[\"ln_commute\"] * X[COL_HOME]\n",
    "\n",
    "    X = pd.concat([X,\n",
    "                   df[CONTROLS].apply(pd.to_numeric, errors=\"coerce\"),\n",
    "                   year_d, sido_d, rel_d], axis=1).astype(float)\n",
    "\n",
    "    X = sm.add_constant(X, has_constant=\"add\")\n",
    "\n",
    "    y = pd.to_numeric(df[yname], errors=\"coerce\")\n",
    "\n",
    "    data = pd.concat([y.rename(yname), X], axis=1).dropna()\n",
    "    if data.shape[0] < 10:\n",
    "        raise ValueError(f\"{yname}: 표본이 부족합니다 (n={data.shape[0]}).\")\n",
    "\n",
    "    y_clean = data[yname].astype(float)\n",
    "    X_clean = data.drop(columns=[yname]).astype(float)\n",
    "\n",
    "    model = sm.OLS(y_clean, X_clean)\n",
    "    res = model.fit(cov_type=\"HC1\")\n",
    "    return res\n",
    "\n",
    "results = {}\n",
    "for y in OUTCOMES:\n",
    "    try:\n",
    "        results[y] = fit_ols(y)\n",
    "    except Exception as e:\n",
    "        print(f\"[WARN] {y} 적합 중 오류: {e}\")\n",
    "\n",
    "rows = []\n",
    "# 90% CI (z=1.645)\n",
    "z = 1.645\n",
    "\n",
    "for y, res in results.items():\n",
    "    for term in TARGET_TERMS:\n",
    "        if term in res.params.index:\n",
    "            beta = float(res.params[term])\n",
    "            se   = float(res.bse[term])\n",
    "            lo, hi = beta - z*se, beta + z*se\n",
    "            rows.append({\"outcome\": y, \"term\": term, \"coef\": beta, \"ci_lo\": lo, \"ci_hi\": hi})\n",
    "\n",
    "coef_df = pd.DataFrame(rows, columns=[\"outcome\",\"term\",\"coef\",\"ci_lo\",\"ci_hi\"])\n",
    "\n",
    "if coef_df.empty:\n",
    "    print(\"[INFO] 관심계수/적합 결과가 없어 Figure 2를 건너뜁니다.\")\n",
    "else:\n",
    "    nrows, ncols = 2, 5\n",
    "    letters = list(string.ascii_lowercase)\n",
    "\n",
    "    fig, axes = plt.subplots(nrows, ncols, figsize=(22, 8), sharex=False)\n",
    "    axes = axes.flatten()\n",
    "\n",
    "    # 전역 x축 범위\n",
    "    xmin = float(coef_df[\"ci_lo\"].min())\n",
    "    xmax = float(coef_df[\"ci_hi\"].max())\n",
    "    span = xmax - xmin\n",
    "    pad  = 0.1*span if np.isfinite(span) and span > 0 else 1.0\n",
    "    xmin, xmax = xmin - pad, xmax + pad\n",
    "\n",
    "    term_label = {\n",
    "        \"ln_commute\": \"ln(commute)\",\n",
    "        COL_HOME: \"Homeowner\",\n",
    "        \"ln_commute:homeownership\": \"ln(commute) × Homeowner\"\n",
    "    }\n",
    "    colors = {\n",
    "        \"ln_commute\": \"#1f77b4\",\n",
    "        COL_HOME: \"#2ca02c\",\n",
    "        \"ln_commute:homeownership\": \"#d62728\"\n",
    "    }\n",
    "\n",
    "    for i, y in enumerate(OUTCOMES):\n",
    "        ax = axes[i]\n",
    "        sub = coef_df[(coef_df[\"outcome\"] == y) & (coef_df[\"term\"].isin(TARGET_TERMS))].copy()\n",
    "\n",
    "        if sub.empty:\n",
    "            ax.axis(\"off\")\n",
    "            ax.set_title(f\"({letters[i]}) {y} (no estimates)\", fontsize=14, loc=\"left\")\n",
    "            continue\n",
    "\n",
    "        sub[\"order\"] = sub[\"term\"].map({t:j for j, t in enumerate(TARGET_TERMS)})\n",
    "        sub = sub.sort_values(\"order\")\n",
    "\n",
    "        ypos = np.arange(len(sub))[::-1]\n",
    "        for j, row in enumerate(sub.itertuples()):\n",
    "            ax.errorbar(row.coef, ypos[j],\n",
    "                        xerr=[[row.coef - row.ci_lo],[row.ci_hi - row.coef]],\n",
    "                        fmt='o', capsize=4, lw=1.5, ms=6, color=colors.get(row.term, \"black\"))\n",
    "\n",
    "        ax.axvline(0, ls=\"--\", lw=1, color=\"gray\")\n",
    "        ax.set_xlim(xmin, xmax)\n",
    "        ax.set_title(f\"({letters[i]}) {y}\", fontsize=24, loc=\"center\", pad=10)\n",
    "        ax.set_xlabel(\"\", fontsize=12)\n",
    "        ax.tick_params(axis='x', labelsize=12)\n",
    "        ax.grid(axis=\"x\", alpha=0.2)\n",
    "\n",
    "        col_idx = i % ncols\n",
    "        if col_idx == 0:\n",
    "            ax.set_yticks(ypos)\n",
    "            ax.set_yticklabels([term_label.get(t, t) for t in sub[\"term\"]], fontsize=20)\n",
    "        else:\n",
    "            ax.set_yticks(ypos)\n",
    "            ax.set_yticklabels([])\n",
    "\n",
    "    # 남는 축 끄기\n",
    "    for k in range(len(OUTCOMES), nrows*ncols):\n",
    "        axes[k].axis(\"off\")\n",
    "\n",
    "    plt.tight_layout(pad=2.0, w_pad=2.5, h_pad=2.5)\n",
    "    plt.savefig(\"figure_3.png\", dpi=600, bbox_inches=\"tight\")\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a5a7f18-3d3f-43ba-906f-931932d87903",
   "metadata": {},
   "source": [
    "## FUll results code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5472bcbb-7410-40f8-9566-d5783e0cda0e",
   "metadata": {},
   "outputs": [],
   "source": [
    "if not results:\n",
    "    print(\"[INFO] 적합된 결과가 없어 출력을 건너뜁니다.\")\n",
    "else:\n",
    "    import numpy as np\n",
    "\n",
    "    # helper to tidy a single result into a DataFrame (90% CI to match your z=1.645 above)\n",
    "    def tidy_result(res, outcome_name, ci_alpha=0.10):\n",
    "        ci = res.conf_int(alpha=ci_alpha)\n",
    "        out = pd.DataFrame({\n",
    "            \"outcome\": outcome_name,\n",
    "            \"term\": res.params.index,\n",
    "            \"coef\": res.params.values,\n",
    "            \"se\": res.bse.values,\n",
    "            \"t\": res.tvalues,\n",
    "            \"p\": res.pvalues,\n",
    "            \"ci_lo\": ci[0].values,\n",
    "            \"ci_hi\": ci[1].values\n",
    "        })\n",
    "        # for consistent ordering, put constant last\n",
    "        const_last = out[out[\"term\"] == \"const\"]\n",
    "        others = out[out[\"term\"] != \"const\"]\n",
    "        out = pd.concat([others, const_last], axis=0, ignore_index=True)\n",
    "        return out\n",
    "\n",
    "    # 1) Console: full statsmodels summaries\n",
    "    print(\"\\n\" + \"=\"*80)\n",
    "    print(\"FULL REGRESSION SUMMARIES (HC1 robust)\")\n",
    "    print(\"=\"*80 + \"\\n\")\n",
    "    for y, res in results.items():\n",
    "        print(f\"\\n{'-'*30}  Outcome: {y}  {'-'*30}\\n\")\n",
    "        # Use .summary() for full table; .summary2() if you prefer a compact variant\n",
    "        print(res.summary())\n",
    "\n",
    "    # 2) Build tidy DataFrames and export to Excel\n",
    "    all_tidy = []\n",
    "    for y, res in results.items():\n",
    "        all_tidy.append(tidy_result(res, y, ci_alpha=0.10))\n",
    "    all_tidy = pd.concat(all_tidy, axis=0, ignore_index=True)\n",
    "\n",
    "    # also prepare a focal-terms-only table (your three key variables)\n",
    "    focal_terms = [\"ln_commute\", COL_HOME, \"ln_commute:homeownership\"]\n",
    "    focal = all_tidy[all_tidy[\"term\"].isin(focal_terms)].copy()\n",
    "\n",
    "    # Write each outcome to its own sheet + a stacked sheet + focal-only sheet\n",
    "    out_path = \"regression_results_full.xlsx\"\n",
    "    with pd.ExcelWriter(out_path, engine=\"xlsxwriter\") as writer:\n",
    "        # one sheet per outcome\n",
    "        for y in OUTCOMES:\n",
    "            if y in results:\n",
    "                out_y = all_tidy[all_tidy[\"outcome\"] == y].copy()\n",
    "                # sort by absolute t-stat, descending, optional but often useful\n",
    "                out_y = out_y.sort_values(by=\"t\", key=np.abs, ascending=False)\n",
    "                out_y.to_excel(writer, sheet_name=y[:31], index=False)\n",
    "        # stacked\n",
    "        all_tidy.sort_values([\"outcome\", \"term\"]).to_excel(writer, sheet_name=\"ALL_outcomes\", index=False)\n",
    "        # focal\n",
    "        focal.sort_values([\"outcome\", \"term\"]).to_excel(writer, sheet_name=\"FOCAL_terms\", index=False)\n",
    "\n",
    "    print(f\"\\n[INFO] Excel 파일 저장 완료: {out_path}\\n\")\n",
    "\n",
    "    # 3) Optional: pretty print a compact table of the three focal terms to console\n",
    "    print(\"\\n\" + \"=\"*80)\n",
    "    print(\"FOCAL TERMS (ln_commute, homeownership, ln_commute:homeownership) with 90% CI\")\n",
    "    print(\"=\"*80 + \"\\n\")\n",
    "    # order outcomes as in OUTCOMES and keep the focal term order\n",
    "    focal[\"term\"] = pd.Categorical(focal[\"term\"], categories=focal_terms, ordered=True)\n",
    "    focal = focal.sort_values([\"outcome\", \"term\"])\n",
    "    # format a clean view\n",
    "    focal_view = focal[[\"outcome\", \"term\", \"coef\", \"se\", \"t\", \"p\", \"ci_lo\", \"ci_hi\"]].copy()\n",
    "    # Rounded for console readability (adjust if you need more precision)\n",
    "    focal_view = focal_view.round({\"coef\": 4, \"se\": 4, \"t\": 3, \"p\": 3, \"ci_lo\": 4, \"ci_hi\": 4})\n",
    "    print(focal_view.to_string(index=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2501b839-8850-4b78-acfb-d9a9c61d1e69",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "da16d618-7240-49d8-9710-6c420b5c799f",
   "metadata": {},
   "source": [
    "### Predicted values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "64e1b80a-b329-447d-b5ab-2f391bbfc41a",
   "metadata": {},
   "outputs": [],
   "source": [
    "#  Predicted Effects\n",
    "# ------------------------------------------------------------\n",
    "nrows, ncols = 2, 5\n",
    "letters = list(string.ascii_lowercase)\n",
    "fig, axes = plt.subplots(nrows, ncols, figsize=(24, 12), sharex=False)\n",
    "axes = axes.flatten()\n",
    "\n",
    "# Prediction grid: 5분~150분까지 5분 단위\n",
    "grid_minutes = np.arange(5, 151, 5)\n",
    "ln_grid = np.log(grid_minutes)\n",
    "\n",
    "for i, y in enumerate(OUTCOMES):\n",
    "    ax = axes[i]\n",
    "    if y not in results:\n",
    "        ax.axis(\"off\")\n",
    "        ax.set_title(f\"({letters[i]}) {y} (no model)\", fontsize=16, loc=\"left\")\n",
    "        continue\n",
    "\n",
    "    res = results[y]\n",
    "    preds = []\n",
    "\n",
    "    X_mean = df[CONTROLS].mean(skipna=True)\n",
    "    year_vals = {c:0 for c in year_d.columns}\n",
    "    sido_vals = {c:0 for c in sido_d.columns}\n",
    "    religion_vals = {c:0 for c in rel_d.columns}\n",
    "\n",
    "    for h in [0, 1]:  # homeownership = 0, 1\n",
    "        for ln_c, m in zip(ln_grid, grid_minutes):\n",
    "            row = {\n",
    "                \"const\": 1.0,\n",
    "                \"ln_commute\": ln_c,\n",
    "                \"homeownership\": h,\n",
    "                \"ln_commute:homeownership\": ln_c * h\n",
    "            }\n",
    "            # controls at means\n",
    "            for c in CONTROLS:\n",
    "                row[c] = X_mean[c]\n",
    "            # FE/더미는 baseline=0\n",
    "            row.update(year_vals)\n",
    "            row.update(sido_vals)\n",
    "            row.update(religion_vals)\n",
    "\n",
    "            X_row = pd.Series(row)[res.params.index]\n",
    "            pred = res.get_prediction(X_row.values.reshape(1, -1))\n",
    "            # 90% CI\n",
    "            frame = pred.summary_frame(alpha=0.10)\n",
    "            preds.append({\n",
    "                \"home\": h,\n",
    "                \"minutes\": m,\n",
    "                \"mean\": frame[\"mean\"].iloc[0],\n",
    "                \"lo\": frame[\"mean_ci_lower\"].iloc[0],\n",
    "                \"hi\": frame[\"mean_ci_upper\"].iloc[0]\n",
    "            })\n",
    "\n",
    "    pred_df = pd.DataFrame(preds)\n",
    "\n",
    "    for h, label, style, color in [\n",
    "        (0, \"Non-homeowner\", \":\", \"blue\"),\n",
    "        (1, \"Homeowner\", \"-\", \"red\")\n",
    "    ]:\n",
    "        sub = pred_df[pred_df[\"home\"]==h]\n",
    "        if sub.empty:\n",
    "            continue\n",
    "        ax.plot(sub[\"minutes\"], sub[\"mean\"], label=label, color=color, lw=2, ls=style)\n",
    "        ax.fill_between(sub[\"minutes\"], sub[\"lo\"], sub[\"hi\"], color=color, alpha=0.2)\n",
    "\n",
    "    ax.set_title(f\"({letters[i]}) {y}\", fontsize=25, loc=\"center\", pad=13)\n",
    "    ax.set_xlabel(\"Commute time (minutes)\", fontsize=14)\n",
    "    ax.set_ylabel(\"Predicted outcome\", fontsize=14)\n",
    "    ax.tick_params(labelsize=13)\n",
    "    ax.grid(True, alpha=0.3)\n",
    "    ax.legend(fontsize=13, loc=\"best\")\n",
    "\n",
    "for k in range(len(OUTCOMES), nrows*ncols):\n",
    "    axes[k].axis(\"off\")\n",
    "\n",
    "plt.tight_layout(pad=3.0, w_pad=3.0, h_pad=3.0)\n",
    "plt.savefig(\"figure_4.png\", dpi=600, bbox_inches=\"tight\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71bc872a-bf34-432e-a46d-0b434debbd88",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
